library(ggplot2) # (Wickham, 2016)
library(tidyr) # (Wickham and Henry, 2020)
library(dplyr) # (Wickham et al., 2020)
library(reshape2) # (Wickham, 2007)
library(cowplot) # (Wilke, 2019)
library(hexbin) # (Carr, Lewin-Kog, Maechler, and Sarkar, 2020)
We conducted these analyses using the following computing environment:
print(version)
## _
## platform x86_64-apple-darwin15.6.0
## arch x86_64
## os darwin15.6.0
## system x86_64, darwin15.6.0
## status
## major 3
## minor 6.2
## year 2019
## month 12
## day 12
## svn rev 77560
## language R
## version.string R version 3.6.2 (2019-12-12)
## nickname Dark and Stormy Night
theme_set(theme_cowplot())
alpha <- 0.05
data_path <- "./data/agg_data.csv"
agg_data <- read.csv(data_path, na.strings="NONE")
agg_data$BIT_FLIP_PROB <- as.factor(agg_data$BIT_FLIP_PROB)
agg_data$DRIFT <- agg_data$TOURNAMENT_SIZE==1
# Label
chg_rate_label <- function(mag, interval, drift) {
if (drift) { return("drift") }
else if (interval == 0) { return("0") }
else { return(paste(mag, interval, sep="/")) }
}
agg_data$chg_rate_label <- factor(mapply(chg_rate_label,
agg_data$CHANGE_MAGNITUDE,
agg_data$CHANGE_FREQUENCY,
agg_data$DRIFT),
levels=c("drift", "0", "1/256", "1/128",
"1/64", "1/32", "1/16", "1/8",
"1/4", "1/2", "1/1", "2/1",
"4/1", "8/1", "16/1", "32/1",
"64/1", "128/1", "256/1",
"512/1", "1024/1", "2048/1",
"4096/1", "8192/1", "16384/1",
"32768/1"))
# Divide up the data into convenient partitions
data_gradient_phase0 <-
filter(agg_data,
GRADIENT_MODEL==1 & evo_phase == 0 & update == 50000)
data_gradient_phase1 <-
filter(agg_data,
GRADIENT_MODEL==1 & evo_phase == 1 & update == 60000)
data_nk_phase0 <-
filter(agg_data,
GRADIENT_MODEL==0 & evo_phase == 0 & update == 50000)
data_nk_phase1 <-
filter(agg_data,
GRADIENT_MODEL==0 & evo_phase == 1 & update == 60000)
We evolved populations for 50,000 generations under a range of environmental change rates.
Note issue with fitnesses for changing environments.
ggplot(data_gradient_phase0,
aes(x=chg_rate_label, y=fitness, color=chg_rate_label)) +
geom_boxplot() +
xlab("Environmental Change") +
scale_y_continuous(name="Fitness (of best organism)") +
ggtitle("Gradient Fitness Model") +
theme(legend.position="none")
ggplot(data_nk_phase0,
aes(x=chg_rate_label, y=fitness, color=chg_rate_label)) +
geom_boxplot() +
xlab("Env. bits flipped per generation") +
scale_y_continuous(name="Fitness (of best organism)") +
ggtitle("NK Fitness Model") +
theme(legend.position="none")
ggplot(data_gradient_phase0,
aes(x=chg_rate_label, y=coding_sites, color=chg_rate_label)) +
geom_boxplot() +
geom_jitter(aes(color=chg_rate_label), alpha=0.1) +
xlab("Environmental Change") +
scale_y_continuous(name="Coding Sites (in best genotype)",
limits=c(0, 130),
breaks=seq(0, 130, 20)) +
ggtitle("Gradient Fitness Model") +
theme(legend.position="none")
kt <- kruskal.test(formula = coding_sites ~ chg_rate_label, data=data_gradient_phase0)
kt
##
## Kruskal-Wallis rank sum test
##
## data: coding_sites by chg_rate_label
## Kruskal-Wallis chi-squared = 350.31, df = 7, p-value < 2.2e-16
if (kt$p.value <= alpha) {
wt <- pairwise.wilcox.test(x=data_gradient_phase0$coding_sites,
g=data_gradient_phase0$chg_rate_label,
exact=FALSE,
p.adjust.method = "bonferroni")
wt
}
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: data_gradient_phase0$coding_sites and data_gradient_phase0$chg_rate_label
##
## drift 2/1 4/1 8/1 16/1 32/1 64/1
## 2/1 < 2e-16 - - - - - -
## 4/1 < 2e-16 1.7e-05 - - - - -
## 8/1 2.2e-14 1.4e-13 0.031 - - - -
## 16/1 1.2e-06 6.3e-16 2.6e-05 0.392 - - -
## 32/1 1.000 < 2e-16 2.1e-12 1.5e-06 0.058 - -
## 64/1 1.000 < 2e-16 < 2e-16 7.5e-15 4.4e-07 0.888 -
## 128/1 1.000 < 2e-16 < 2e-16 6.5e-16 7.8e-11 0.012 1.000
##
## P value adjustment method: bonferroni
ggplot(data_nk_phase0,
aes(x=chg_rate_label, y=coding_sites, color=chg_rate_label)) +
geom_boxplot() +
geom_jitter(alpha=0.1) +
xlab("Environmental Change") +
scale_y_continuous(name="# coding sites in best organism",
limits=c(0, 130),
breaks=seq(0, 130, 20)) +
ggtitle("NK Fitness Model") +
theme(legend.position="none")
kt <- kruskal.test(formula = coding_sites ~ chg_rate_label, data=data_nk_phase0)
kt
##
## Kruskal-Wallis rank sum test
##
## data: coding_sites by chg_rate_label
## Kruskal-Wallis chi-squared = 315.79, df = 7, p-value < 2.2e-16
if (kt$p.value <= alpha) {
wt <- pairwise.wilcox.test(x=data_nk_phase0$coding_sites,
g=data_nk_phase0$chg_rate_label,
exact=FALSE,
p.adjust.method = "bonferroni")
wt
}
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: data_nk_phase0$coding_sites and data_nk_phase0$chg_rate_label
##
## drift 512/1 1024/1 2048/1 4096/1 8192/1 16384/1
## 512/1 < 2e-16 - - - - - -
## 1024/1 4.8e-07 1.1e-06 - - - - -
## 2048/1 1.00000 < 2e-16 0.00010 - - - -
## 4096/1 0.18768 < 2e-16 < 2e-16 1.4e-07 - - -
## 8192/1 0.02331 < 2e-16 < 2e-16 1.0e-07 1.00000 - -
## 16384/1 0.04611 < 2e-16 5.6e-16 3.2e-07 1.00000 1.00000 -
## 32768/1 0.00051 < 2e-16 < 2e-16 7.4e-11 0.29327 1.00000 1.00000
##
## P value adjustment method: bonferroni
ggplot(data_gradient_phase0,
aes(x=chg_rate_label, y=genome_length, color=chg_rate_label)) +
geom_boxplot() +
geom_jitter(alpha=0.1) +
xlab("Environmental Change") +
ylab("Genome Length") +
ylim(0, 1025) +
ggtitle("Gradient Fitness Model") +
theme(legend.position="none")
ggplot(data_nk_phase0,
aes(x=chg_rate_label, y=genome_length, color=chg_rate_label)) +
geom_boxplot() +
geom_jitter(alpha=0.1) +
xlab("Environmental Change") +
ylab("Genome Length") +
ylim(0, 1024) +
ggtitle("NK Fitness Model") +
theme(legend.position="none")
ggplot(data_gradient_phase0,
aes(x=chg_rate_label, y=neutral_sites, color=chg_rate_label)) +
geom_boxplot() +
geom_jitter(alpha=0.1) +
xlab("Environmental Change") +
ylab("Neutral Sites") +
ylim(-1, 1025) +
ggtitle("Gradient Fitness Model") +
theme(legend.position="none")
ggplot(data_nk_phase0,
aes(x=chg_rate_label, y=neutral_sites, color=chg_rate_label)) +
geom_boxplot() +
geom_jitter(alpha=0.1) +
xlab("Environmental Change") +
ylab("Neutral Sites") +
ylim(-1, 1025) + # jitter can jitter below 0
ggtitle("NK Fitness Model") +
theme(legend.position="none")
We lock-down genetic architectures and evolve for 10,000 generations in a random static environment.
ggplot(data_gradient_phase1,
aes(x=chg_rate_label, y=fitness, color=chg_rate_label)) +
geom_boxplot() +
geom_jitter(alpha=0.1) +
xlab("Environmental Change") +
scale_y_continuous(name="Fitness") +
ggtitle("Gradient Fitness Model (transfer)") +
theme(legend.position="none")
kt <- kruskal.test(formula = fitness ~ chg_rate_label, data=data_gradient_phase1)
kt
##
## Kruskal-Wallis rank sum test
##
## data: fitness by chg_rate_label
## Kruskal-Wallis chi-squared = 334.2, df = 7, p-value < 2.2e-16
if (kt$p.value <= alpha) {
wt <- pairwise.wilcox.test(x=data_gradient_phase1$fitness,
g=data_gradient_phase1$chg_rate_label,
exact=FALSE,
p.adjust.method = "bonferroni")
wt
}
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: data_gradient_phase1$fitness and data_gradient_phase1$chg_rate_label
##
## drift 2/1 4/1 8/1 16/1 32/1 64/1
## 2/1 < 2e-16 - - - - - -
## 4/1 < 2e-16 5.7e-05 - - - - -
## 8/1 9.9e-11 1.0e-11 0.09164 - - - -
## 16/1 0.00027 < 2e-16 1.9e-05 0.38986 - - -
## 32/1 1.00000 < 2e-16 5.4e-13 3.4e-07 0.00934 - -
## 64/1 1.00000 < 2e-16 < 2e-16 1.3e-14 2.0e-07 1.00000 -
## 128/1 0.02480 < 2e-16 < 2e-16 < 2e-16 1.4e-10 0.03334 1.00000
##
## P value adjustment method: bonferroni
ggplot(data_nk_phase1,
aes(x=chg_rate_label, y=fitness, color=chg_rate_label)) +
geom_boxplot() +
geom_jitter(alpha=0.1) +
xlab("Environmental Change") +
scale_y_continuous(name="Fitness") +
ggtitle("NK Fitness Model (transfer)") +
theme(legend.position="none")
kt <- kruskal.test(formula = fitness ~ chg_rate_label, data=data_nk_phase1)
kt
##
## Kruskal-Wallis rank sum test
##
## data: fitness by chg_rate_label
## Kruskal-Wallis chi-squared = 281.78, df = 7, p-value < 2.2e-16
if (kt$p.value <= alpha) {
wt <- pairwise.wilcox.test(x=data_nk_phase1$fitness,
g=data_nk_phase1$chg_rate_label,
exact=FALSE,
p.adjust.method = "bonferroni")
wt
}
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: data_nk_phase1$fitness and data_nk_phase1$chg_rate_label
##
## drift 512/1 1024/1 2048/1 4096/1 8192/1 16384/1
## 512/1 2.5e-11 - - - - - -
## 1024/1 0.00096 0.00093 - - - - -
## 2048/1 1.00000 5.1e-13 0.00154 - - - -
## 4096/1 0.03019 < 2e-16 < 2e-16 3.6e-07 - - -
## 8192/1 0.00377 < 2e-16 2.8e-16 4.5e-08 1.00000 - -
## 16384/1 0.00288 < 2e-16 3.2e-16 4.7e-08 1.00000 1.00000 -
## 32768/1 0.00027 < 2e-16 < 2e-16 2.2e-10 1.00000 1.00000 1.00000
##
## P value adjustment method: bonferroni
ggplot(data_gradient_phase1,
aes(x=coding_sites, y=fitness)) +
geom_point(aes(color=chg_rate_label)) +
xlab("Coding Sites") +
ylab("Fitness") +
theme(legend.position="top") +
ggtitle("Gradient Fitness Model")
cor.test(x=data_gradient_phase1$coding_sites,
y=data_gradient_phase1$fitness,
method="spearman",
exact=FALSE)
##
## Spearman's rank correlation rho
##
## data: data_gradient_phase1$coding_sites and data_gradient_phase1$fitness
## S = 2352075, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9724366
ggplot(data_nk_phase1,
aes(x=coding_sites, y=fitness)) +
geom_point(aes(color=chg_rate_label)) +
xlab("Coding Sites") +
ylab("Fitness") +
theme(legend.position="top") +
ggtitle("NK Fitness Model")
cor.test(x=data_nk_phase1$coding_sites,
y=data_nk_phase1$fitness,
method="spearman",
exact=FALSE)
##
## Spearman's rank correlation rho
##
## data: data_nk_phase1$coding_sites and data_nk_phase1$fitness
## S = 3563060, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9582453
ggplot(data_gradient_phase0,
aes(x=chg_rate_label, y=coding_sites)) +
geom_boxplot() +
geom_jitter(alpha=0.1) +
xlab("Environmental Change") +
scale_y_continuous(name="Coding Sites\n(best genotype)",
limits=c(0, 130),
breaks=seq(0, 130, 20)) +
theme(legend.position="none") +
ggsave("./imgs/coding-sites-v-chg-rate-gradient-phase0.pdf", width=10, height=7)
ggplot(data_gradient_phase1,
aes(x=chg_rate_label, y=fitness)) +
geom_boxplot() +
xlab("Environmental Change") +
scale_y_continuous(name="Fitness") +
# ggtitle("Gradient Fitness Model (phase 2)") +
theme(legend.position="none") +
ggsave("./imgs/fitness-v-chg-rate-gradient-phase1.pdf", width=10, height=7)
ggplot(data_gradient_phase1,
aes(x=coding_sites, y=fitness)) +
geom_hex(alpha=0.95) +
scale_fill_continuous(type = "viridis",
trans="log",
name="Count",
breaks=c(1, 10, 100)) +
scale_x_continuous(name="Coding Sites",
limits=c(0, 130),
breaks=seq(0, 130, 20)) +
ylab("Fitness") +
ggsave("./imgs/coding-sites-v-fitness-gradient-phase1-hex.pdf", width=10, height=7)
ggplot(data_gradient_phase1,
aes(x=coding_sites, y=fitness)) +
geom_density_2d() +
stat_density_2d(aes(fill = ..level..),
geom = "polygon",
colour="white") +
geom_jitter(alpha=0.05) +
scale_fill_continuous(type = "viridis",
trans="log",
name="Count") +
ylab("Fitness") +
theme(legend.position = "none")
ggplot(data_gradient_phase1,
aes(x=coding_sites, y=fitness)) +
geom_point(alpha=0.95) +
scale_x_continuous(name="Coding Sites",
limits=c(0, 130),
breaks=seq(0, 130, 20)) +
ylab("Fitness")